# -------------------------------------------------------------------- #
# Code for replicating resuts in the following paper:
# "Majoritarian Politics and Hate Crimes Against Religious Minorities: 
# Evidence from India, 2009–2018"
# -------------------------------------------------------------------- #


# Clear workspace
rm(list = ls())

# Libraries
library(tidyverse)
library(foreign)
library(ggplot2)
library(ggpubr)
library(stargazer)
library(sandwich)
library(lmtest)
library(car)
library(fastDummies)
library(plm)
library(Formula)
library(broom)
library(pglm)
library(AER)
library(imputeTS)
library(miceadds)
library(clubSandwich)
library(clusterSEs)
library(MASS)
library(margins)
library(ivtools)
library(boot)
library(modelr)
library(margins)
library(RConics)
library(xtable)



# -------------------------------------------------------- #
# -------------------- DATA SETS ------------------------- #

# -- Data on Hate Crimes and control variables
d2 <- read.csv("HC-Data-Replication.csv",header = TRUE)


# --- Data on Lok Sabha Elections
ls <- read.csv("lspolls.csv",header = TRUE)



# ---------------------------------------------- #
# ---------------- TABLES ---------------------- #


# ---- TABLE 1: Total hate crimes by community
d2_hc <- d2 %>%
  group_by(after) %>%
  summarise(hc_mus = sum(muslim, na.rm = TRUE),
            hc_ch = sum(christian, na.rm = TRUE),
            hc_hin = sum(hindu, na.rm = TRUE),
            hc_sikh = sum(sikh, na.rm = TRUE),
            hc_unk = sum(unknown, na.rm = TRUE),
            hc_minority = sum(hcam1, na.rm = TRUE),
            hc_diff = sum(hc_d, na.rm = TRUE)) 

# Latex code
stargazer(t(d2_hc), type = "latex", 
          summary = FALSE)




#-------------------------------------------------- #
# -------------- TABLE 2: Basic DD ---------------- #

# Year and State FE
resbn1 <- miceadds::lm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) ,
  data = d2, cluster = "state"
)
resbn1_obs <- nobs(resbn1$lm_res)
myvcov <- vcov(resbn1)
resbn1_pr <- coeftest(resbn1, vcov. = myvcov)
resbn1_pr[c("after:treat3"),]
resbn1_r2 <- round(
  summary(resbn1$lm_res)$r.squared,digits = 3
)

# Add time varying controls
resbn2 <- miceadds::lm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn ,
  data = d2, cluster = "state"
)
resbn2_obs <- nobs(resbn2$lm_res)
myvcov <- vcov(resbn2)
resbn2_pr <- coeftest(resbn2, vcov. = myvcov)
resbn2_pr[c("after:treat3"),]
resbn2_r2 <- round(
  summary(resbn2$lm_res)$r.squared,digits = 3
)

# Add pre-treatment controls 
resbn3 <- miceadds::lm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after,
  data = d2, cluster = "state"
)
resbn3_obs <- nobs(resbn3$lm_res)
myvcov <- vcov(resbn3)
resbn3_pr <- coeftest(resbn3, vcov. = myvcov)
resbn3_pr[c("after:treat3"),]
resbn3_r2 <- round(
  summary(resbn3$lm_res)$r.squared,digits = 3
)

# Add log-crimes 
resbn4 <- miceadds::lm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d2, cluster = "state"
)

resbn4_obs <- nobs(resbn4$lm_res)
myvcov <- vcov(resbn4)
resbn4_pr <- coeftest(resbn4, vcov. = myvcov)
resbn4_pr[c("after:treat3"),]
resbn4_r2 <- round(
  summary(resbn4$lm_res)$r.squared,digits = 3
)

# Remove UP
d3 <- d2[d2$state!="Uttar Pradesh",]
resbn5 <- miceadds::lm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d3, cluster = "state"
)
resbn5_obs <- nobs(resbn5$lm_res)
myvcov <- vcov(resbn5)
resbn5_pr <- coeftest(resbn5, vcov. = myvcov)
resbn5_pr[c("after:treat3"),]
resbn5_r2 <- round(
  summary(resbn5$lm_res)$r.squared,digits = 3
)


# Remove Manipur, Meghalaya, Mizoram, Nagaland, Sikkim 
d3 <- d2[d2$state!="Manipur" &
           d2$state!="Meghalaya" &
           d2$state!="Mizoram" &
           d2$state!="Nagaland" &
           d2$state!="Sikkim",]
resbn6 <- miceadds::lm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d3, cluster = "state"
)
resbn6_obs <- nobs(resbn6$lm_res)
myvcov <- vcov(resbn6)
resbn6_pr <- coeftest(resbn6, vcov. = myvcov)
resbn6_pr[c("after:treat3"),]
resbn6_r2 <- round(
  summary(resbn6$lm_res)$r.squared,digits = 3
)


# Additional rows to add to results
reg_obs <- c("Observations", resbn1_obs,resbn2_obs,
             resbn3_obs,resbn4_obs,resbn5_obs,resbn6_obs)
reg_r2 <- c("R2", resbn1_r2,resbn2_r2,
            resbn3_r2,resbn4_r2,resbn5_r2,resbn6_r2)
reg_fes <- c("State FE", "Y", "Y", "Y","Y","Y","Y")
reg_fet <- c("Year FE", "Y", "Y", "Y","Y","Y","Y")
reg_con1 <- c("Time Varying Controls", "", "Y", "Y","Y","Y","Y")
reg_con2 <- c("Pre-Treatment Controls", "", "", "Y","Y","Y","Y")
reg_crm <- c("Log Crime Incidence", "", "", "","Y","Y","Y")
reg_up <- c("Drop Uttar Pradesh", "", "", "","","Y","")
reg_ne <- c("Drop NE States", "", "", "","","","Y")

# See results on screen
stargazer(
  resbn1_pr, resbn2_pr, resbn3_pr, 
  resbn4_pr,resbn5_pr, resbn6_pr,
  keep = c("after:treat3"),
  covariate.labels = c("After X TREAT"),
  type = "text", style = "qje",
  add.lines = list(reg_obs, reg_r2,reg_fes,
                   reg_fet,reg_con1,
                   reg_con2,reg_crm,reg_up,
                   reg_ne),
  dep.var.caption = "anti-minority hate crimes"
)


# Results for latex file
stargazer(
  resbn1_pr, resbn2_pr, resbn3_pr, 
  resbn4_pr,resbn5_pr, resbn6_pr,
  keep = c("after:treat3"),
  covariate.labels = c("After X TREAT"),
  style = "qje",
  add.lines = list(reg_obs, reg_r2,reg_fes,
                   reg_fet,reg_con1,
                   reg_con2,reg_crm,reg_up,
                   reg_ne),
  dep.var.caption = "anti-minority hate crimes"
)



# ------- Bootstrapping P-Values ------------ #
# --- Method: Wild cluster boostrapping
# Note: this is manually added to Table 2 (last row)

# -- Create data set
d4 <- pdata.frame(d2, index = c("state", "year"))
d4$time <- as.numeric(d4$year)
d4$mytreat <- d4$treat3*d4$after


# Create vector to store p-values
mypvals <- base::rep(0,times=6)


# --- Model 1
res <- plm(
  hcam1 ~ after:treat3 + factor(year),
  data = d4, model="within"
)

res_bs <- cluster.wild.plm(
  mod=res, dat=d4, cluster="group", ci.level = 0.95,
  boot.reps = 1000, report = TRUE, prog.bar = TRUE,
  seed = 99999
)

mypvals[1] <- res_bs$p.values["after:treat3",]


# --- Model 2
# Drop 2018 (because popn not available)
d5 <- d4 %>%
  filter(time<=9) %>%
  as.data.frame()

# Respecify as panel data
d6 <- pdata.frame(
  d5, index = c("state", "year")
)

res <- plm(
  hcam1 ~ after:treat3 + factor(year) + 
    lpcnsdp + lpopn,
  data = d6, model="within"
)

res_bs <- cluster.wild.plm(
  mod=res, dat=d6, cluster="group", 
  ci.level = 0.95,boot.reps = 1000, report = TRUE, 
  prog.bar = TRUE,seed = 99999
)

mypvals[2] <- res_bs$p.values["after:treat3",]


# --- Model 3
res <- plm(
  hcam1 ~ after:treat3 + factor(year) + 
    lpcnsdp + lpopn + urb_sh:after +
    m_sh:after + lit_prop:after,
  data = d6, model="within"
)

res_bs <- cluster.wild.plm(
  mod=res, dat=d6, cluster="group", ci.level = 0.95,
  boot.reps = 1000, report = TRUE, prog.bar = TRUE,
  seed = 99999
)

mypvals[3] <- res_bs$p.values["after:treat3",]


# --- Model 4
# Drop 2017, 2018 (because crime rate not available)
d7 <- d4 %>%
  filter(time<=8) %>%
  as.data.frame()

# Respecify as panel data
d8 <- pdata.frame(
  d7, index = c("state", "year")
)

res <- plm(
  hcam1 ~ after:treat3 + factor(year) + 
    lpcnsdp + lpopn + urb_sh:after +
    m_sh:after + lit_prop:after + lcrm,
  data = d8, model="within"
)

res_bs <- cluster.wild.plm(
  mod=res, dat=d8, cluster="group", ci.level = 0.95,
  boot.reps = 1000, report = TRUE, prog.bar = TRUE,
  seed = 99999
)

mypvals[4] <- res_bs$p.values["after:treat3",]


# --- Model 5
res <- plm(
  hcam1 ~ after:treat3 + factor(year) + 
    lpcnsdp + lpopn + urb_sh:after +
    m_sh:after + lit_prop:after + lcrm,
  data = d8[d8$state!="Uttar Pradesh",], model="within"
)

res_bs <- cluster.wild.plm(
  mod=res, dat=d8[d8$state!="Uttar Pradesh",], 
  cluster="group", ci.level = 0.95,
  boot.reps = 1000, report = TRUE, prog.bar = TRUE,
  seed = 99999
)

mypvals[5] <- res_bs$p.values["after:treat3",]


# --- Model 6
res <- plm(
  hcam1 ~ after:treat3 + factor(year) + 
    lpcnsdp + lpopn + urb_sh:after +
    m_sh:after + lit_prop:after + lcrm,
  data = d8[d8$state!="Manipur" &
              d8$state!="Meghalaya" &
              d8$state!="Mizoram" &
              d8$state!="Nagaland" &
              d8$state!="Sikkim",], 
  model="within"
)

res_bs <- cluster.wild.plm(
  mod=res, dat=d8[d8$state!="Manipur" &
                    d8$state!="Meghalaya" &
                    d8$state!="Mizoram" &
                    d8$state!="Nagaland" &
                    d8$state!="Sikkim",], 
  cluster="group", ci.level = 0.95,
  boot.reps = 1000, report = TRUE, prog.bar = TRUE,
  seed = 99999
)

mypvals[6] <- res_bs$p.values["after:treat3",]

# Print vector of p-value
# Note: this is manually added to Table 2
(mypvals)




#-------------------------------------------------- #
# -------- TABLE 3: Parallel Trends before 2014 --- #

# Choose data for years before 2014
d5 <- d2[d2$year<=2013,]
d5$ltime <- d5$year - 2008

# Year and State FE
resbn1 <- miceadds::lm.cluster(
  formula = hcam1 ~ treat3:ltime + factor(year) + 
    factor(state) ,
  data = d5, cluster = "state"
)
resbn1_obs <- nobs(resbn1$lm_res)
myvcov <- vcov(resbn1)
resbn1_pr <- coeftest(resbn1, vcov. = myvcov)
resbn1_pr[c("treat3:ltime"),]


# Add time varying controls
resbn2 <- miceadds::lm.cluster(
  formula = hcam1 ~ treat3:ltime + factor(year) + 
    factor(state) + lpcnsdp + lpopn ,
  data = d5, cluster = "state"
)
resbn2_obs <- nobs(resbn2$lm_res)
myvcov <- vcov(resbn2)
resbn2_pr <- coeftest(resbn2, vcov. = myvcov)
resbn2_pr[c("treat3:ltime"),]


# Add log crime 
resbn3 <- miceadds::lm.cluster(
  formula = hcam1 ~ treat3:ltime + factor(year) + 
    factor(state) + lpcnsdp + lpopn + lcrm,
  data = d5, cluster = "state"
)
resbn3_obs <- nobs(resbn3$lm_res)
myvcov <- vcov(resbn3)
resbn3_pr <- coeftest(resbn3, vcov. = myvcov)
resbn3_pr[c("treat3:ltime"),]



# Additional rows to add to results
reg_obs <- c("Observations", resbn1_obs,resbn2_obs,
             resbn3_obs)
reg_fes <- c("State FE", "Y", "Y", "Y")
reg_fet <- c("Year FE", "Y", "Y", "Y")
reg_con1 <- c("Time Varying Controls", "", "Y", "Y")
reg_crm <- c("Log Crime Incidence", "", "", "Y")

# See results on screen
stargazer(
  resbn1_pr, resbn2_pr, resbn3_pr,
  keep = c("treat3:ltime"),
  covariate.labels = c("After X TREND"),
  type = "text", style = "qje",
  add.lines = list(reg_obs, reg_fes,
                   reg_fet,reg_con1,
                   reg_crm),
  dep.var.caption = "anti-minority hate crimes"
)


# Results for latex file
stargazer(
  resbn1_pr, resbn2_pr, resbn3_pr,
  keep = c("treat3:ltime"),
  covariate.labels = c("After X TREND"),
  style = "qje",
  add.lines = list(reg_obs, reg_fes,
                   reg_fet,reg_con1,
                   reg_crm),
  dep.var.caption = "anti-minority hate crimes"
)



#-------------------------------------------------- #
# ---- TABLE 4: Placebo in terms of community ----- #

# Year and State FE
resbn1 <- miceadds::lm.cluster(
  formula = hindu_hc ~ after + after:treat3 + 
    factor(state) + factor(year),
  data = d2, cluster = "state"
)
resbn1_obs <- nobs(resbn1$lm_res)
myvcov <- vcov(resbn1)
resbn1_pr <- coeftest(resbn1, vcov. = myvcov)
resbn1_pr[c("after:treat3"),]


# Add time varying controls
resbn2 <- miceadds::lm.cluster(
  formula = hindu_hc ~ after + after:treat3 + 
    lpcnsdp + lpopn +
    factor(state) + factor(year),
  data = d2, cluster = "state"
)
resbn2_obs <- nobs(resbn2$lm_res)
myvcov <- vcov(resbn2)
resbn2_pr <- coeftest(resbn2, vcov. = myvcov)
resbn2_pr[c("after:treat3"),]


# Add pre-treatment controls 
resbn3 <- miceadds::lm.cluster(
  formula = hindu_hc ~ after + after:treat3 + 
    lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after +
    factor(state) + factor(year),
  data = d2, cluster = "state"
)
resbn3_obs <- nobs(resbn3$lm_res)
myvcov <- vcov(resbn3)
resbn3_pr <- coeftest(resbn3, vcov. = myvcov)
resbn3_pr[c("after:treat3"),]


# Add log-crimes 
resbn4 <- miceadds::lm.cluster(
  formula = hindu_hc ~ after + after:treat3 + 
    lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm +
    factor(state) + factor(year),
  data = d2, cluster = "state"
)
resbn4_obs <- nobs(resbn4$lm_res)
myvcov <- vcov(resbn4)
resbn4_pr <- coeftest(resbn4, vcov. = myvcov)
resbn4_pr[c("after:treat3"),]


# Additional rows to add to results
reg_obs <- c("Observations", resbn1_obs,resbn2_obs,
             resbn3_obs,resbn4_obs)
reg_fes <- c("State FE", "Y", "Y", "Y","Y")
reg_fet <- c("Year FE", "Y", "Y", "Y","Y")
reg_con1 <- c("Time Varying Controls", "", "Y", "Y","Y")
reg_con2 <- c("Pre-Treatment Controls", "", "", "Y","Y")
reg_crm <- c("Log Crime Incidence", "", "", "","Y")

# See results on screen
stargazer(
  resbn1_pr, resbn2_pr, resbn3_pr, resbn4_pr,
  keep = c("after:treat3"),
  covariate.labels = c("After X TREAT"),
  type = "text", style = "qje",
  add.lines = list(reg_obs,reg_fes,
                   reg_fet,reg_con1,
                   reg_con2,reg_crm),
  dep.var.caption = "anti-minority hate crimes"
)


# Results for latex file
stargazer(
  resbn1_pr, resbn2_pr, resbn3_pr, resbn4_pr,
  keep = c("after:treat3"),
  covariate.labels = c("After X TREAT"),
  style = "qje",
  add.lines = list(reg_obs,reg_fes,
                   reg_fet,reg_con1,
                   reg_con2,reg_crm),
  dep.var.caption = "anti-minority hate crimes"
)


#-------------------------------------------------- #
# ------- TABLE 5: Quasi Poisson Regression ------- #

# Year and State FE
resbn1 <- miceadds::glm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) ,
  data = d2, family = "quasipoisson",
  cluster = "state"
)
resbn1_obs <- nobs(resbn1$glm_res)
myvcov <- vcov(resbn1)
resbn1_pr <- coeftest(resbn1, vcov. = myvcov)
resbn1_pr[c("after:treat3"),]


# Add time varying controls
resbn2 <- miceadds::glm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn ,
  data = d2, family = "quasipoisson",
  cluster = "state"
)
resbn2_obs <- nobs(resbn2$glm_res)
myvcov <- vcov(resbn2)
resbn2_pr <- coeftest(resbn2, vcov. = myvcov)
resbn2_pr[c("after:treat3"),]


# Add pre-treatment controls 
resbn3 <- miceadds::glm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after,
  data = d2, family = "quasipoisson",
  cluster = "state"
)
resbn3_obs <- nobs(resbn3$glm_res)
myvcov <- vcov(resbn3)
resbn3_pr <- coeftest(resbn3, vcov. = myvcov)
resbn3_pr[c("after:treat3"),]


# Add log-crimes 
resbn4 <- miceadds::glm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d2, family = "quasipoisson",
  cluster = "state"
)


resbn4_obs <- nobs(resbn4$glm_res)
myvcov <- vcov(resbn4)
resbn4_pr <- coeftest(resbn4, vcov. = myvcov)
resbn4_pr[c("after:treat3"),]


# Remove UP
d3 <- d2[d2$state!="Uttar Pradesh",]
resbn5 <- miceadds::glm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d3, family = "quasipoisson",
  cluster = "state"
)
resbn5_obs <- nobs(resbn5$glm_res)
myvcov <- vcov(resbn5)
resbn5_pr <- coeftest(resbn5, vcov. = myvcov)
resbn5_pr[c("after:treat3"),]


# Remove NE states
d3 <- d2[d2$state!="Manipur" &
           d2$state!="Meghalaya" &
           d2$state!="Mizoram" &
           d2$state!="Nagaland" &
           d2$state!="Sikkim",]
resbn6 <- miceadds::glm.cluster(
  formula = hcam1 ~ after + after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d3, family = "quasipoisson",
  cluster = "state"
)
resbn6_obs <- nobs(resbn6$glm_res)
myvcov <- vcov(resbn6)
resbn6_pr <- coeftest(resbn6, vcov. = myvcov)
resbn6_pr[c("after:treat3"),]


# Additional rows to add to results
reg_obs <- c("Observations", resbn1_obs,resbn2_obs,
             resbn3_obs,resbn4_obs,resbn5_obs,resbn6_obs)
reg_fes <- c("State FE", "Y", "Y", "Y","Y","Y","Y")
reg_fet <- c("Year FE", "Y", "Y", "Y","Y","Y","Y")
reg_con1 <- c("Time Varying Controls", "", "Y", "Y","Y","Y","Y")
reg_con2 <- c("Pre-Treatment Controls", "", "", "Y","Y","Y","Y")
reg_crm <- c("Log Crime Incidence", "", "", "","Y","Y","Y")
reg_up <- c("Drop Uttar Pradesh", "", "", "","","Y","")
reg_ne <- c("Drop NE States", "", "", "","","","Y")

# See results on screen
stargazer(
  resbn1_pr, resbn2_pr, resbn3_pr, resbn4_pr,resbn5_pr,resbn6_pr,
  keep = c("after:treat3"),
  covariate.labels = c("After X TREAT"),
  type = "text", style = "qje",
  add.lines = list(reg_obs, reg_fes,
                   reg_fet,reg_con1,
                   reg_con2,reg_crm,reg_up,reg_ne),
  dep.var.caption = "anti-minority hate crimes"
)


# Results for latex file
stargazer(
  resbn1_pr, resbn2_pr, resbn3_pr, resbn4_pr,resbn5_pr,resbn6_pr,
  keep = c("after:treat3"),
  covariate.labels = c("After X TREAT"),
  style = "qje",
  add.lines = list(reg_obs, reg_fes,
                   reg_fet,reg_con1,
                   reg_con2,reg_crm,reg_up,reg_ne),
  dep.var.caption = "anti-minority hate crimes"
)



#-------------------------------------------------- #
# ----- TABLE (Appendix) B1: Summary Statistics --- #
# Variables
d3 <- d2 %>%
  dplyr::select(
    hcam1,hvs_ss,hvs1_ss,lcrm,
    lpcnsdp,lpopn,urb_sh,lit_prop,
    m_sh
  ) %>%
  as.data.frame()

# Variable Labels
mylabels <- c("Hate crimes against rel minorities",
              "BJP's vote share in 2014 (%)",
              "BJP's vote share in 2009 (%)",
              "Log-Crime Incidence (number)",
              "Log-PCNSDP (2012-13 rupess)",
              "Log Mid-year Population (lakhs)",
              "Share of Urban Population in 2011 (%)",
              "Literacy Rate in 2011 (%)",
              "Share of Muslim Population in 2011 (%)"
)


# Summary statistics
stargazer(d3, type = "text", covariate.labels = mylabels, 
          digits = 2, summary.stat = c("n","min","mean","max","sd"))

stargazer(d3, type = "latex", covariate.labels = mylabels, 
          digits = 2, summary.stat = c("n","min","mean","max","sd"))

# Average anti-minority hate crimes before and after 2014
d2 %>%
  group_by(after) %>%
  summarize(hcam1_m=mean(hcam1, na.rm = TRUE))



#-------------------------------------------------- #
# --- TABLE (Appendix) B2: 
# ------- Difference in means before 2014 --------- #

# Variables
d3 <- d2 %>%
  dplyr::filter(year<2014) %>%
  dplyr::select(
    hcam1,hvs,hvs1,lcrm,
    lpcnsdp,lpopn,urb_sh,lit_prop,
    m_sh,
    # Treatment dummy
    treat3
  ) %>%
  as.data.frame()

# Create matrix to store results
n <- ncol(d3)
tres <- matrix(data = NA, nrow = (n-1), ncol = 3)

for(i in 1:(n-1)) {
  d0 <- as.data.frame(cbind(d3[,i],d3[,n]))
  d1 <- d0[complete.cases(d0),]
  tres[i,1] <- round(t.test(d1[,1] ~ d1[,2])$estimate[1],digits = 3)
  tres[i,2] <- round(t.test(d1[,1] ~ d1[,2])$estimate[2], digits = 3)
  tres[i,3] <- round(t.test(d1[,1] ~ d1[,2])$p.value, digits = 3)
}

colnames(tres) <- c("Non-BJP","BJP","p-value")
rownames(tres) <- mylabels


# See results on screen
stargazer(tres, type = "text")
stargazer(tres, type = "latex", digits = 2)



# ------------------------------------------------ #
# ---- Table 6: Bias-adjusted treatment effect --- #
# ----- Implementing Oster (2019) ---------------- #

# Create panel data
d4 <- pdata.frame(d2, index = c("state", "year"))
d4$time <- as.numeric(d4$year)
d4$mytreat <- d4$treat3*d4$after


# --- Dep Var = level of anti minority hate crimes
# --- Treatment = 1 if BJP was largest party in 2014

# --- Short Regression: Model with no controls
mod1 <- hcam1 ~ after:treat3

res1 <- miceadds::lm.cluster(data = d4,
                             formula = mod1,
                             cluster = "state")
res1_pr <- coeftest(res1, vcov(res1))
res1_pr
res1_obs <- nobs(res1$lm_res)
res1_r2 <- round(summary(res1$lm_res)$r.squared[1],digits = 3)

R0 <- summary(res1$lm_res)$r.squared[1]
beta0 <- coef(res1$lm_res)["after:treat3"]

# --- Intermediate Regression: Model with all controls
mod2 <- hcam1 ~ after + after:treat3 + factor(year) + 
  factor(state) + lpcnsdp + lpopn + urb_sh:after +
  m_sh:after + lit_prop:after + lcrm

res2 <- miceadds::lm.cluster(data = d4,
                             formula = mod2,
                             cluster = "state")
res2_pr <- coeftest(res2, vcov(res2))
res2_obs <- nobs(res2$lm_res)
res2_r2 <- round(summary(res2$lm_res)$r.squared[1],digits = 3)

Rtilde <- summary(res2$lm_res)$r.squared[1]
betatilde <- coef(res2$lm_res)["after:treat3"]   

# Std Dev of dependent variable
sigmay <- sd(d4$hcam1,na.rm = TRUE)

# --- Auxiliary regression
mod3 <- mytreat ~ after + factor(year) + 
  factor(state) + lpcnsdp + lpopn + urb_sh:after +
  m_sh:after + lit_prop:after + lcrm

# Residual
res3 <- miceadds::lm.cluster(data = d4,
                             formula = mod3,
                             cluster = "state")

# Variance of residual of auxiliary regression
taux <- var(res3$lm_res$residuals, na.rm = TRUE)
# Std Dev of dep var in auxiliary regression
sigmax <- sd(d4$mytreat, na.rm = TRUE)


# ---- Cubic Equation
# Notation: ax^3 + bx^2 + cx + d = 0

# Function to compute roots of cubic equation
# This is the cubic on page 193 of Oster (2019)
mybstar <- function(Rmax,mydelta){
  a <- (mydelta -1)*(taux)*(sigmax - taux)
  b <- taux*(beta0 - betatilde)*(sigmax^2)*(mydelta-2)
  c <- (mydelta)*(Rmax - Rtilde)*(sigmay^2)*(sigmax^2 - taux) -
    ((Rtilde - R0)*sigmay^2)*taux - ((beta0- betatilde)^2)*taux*sigmax^2
  d <-  (mydelta)*(Rmax - Rtilde)*(sigmay^2)*(beta0- betatilde)*(sigmax^2) 
  x0 <- cubic(c(a,b,c,d))
  return(x0)
}


# Run the function on different Rmax and Delta
myresults <- rbind(
  mybstar(0.55,1.25),
  mybstar(0.55,1.50),
  mybstar(0.55,1.75),
  mybstar(0.55,2.00),
  mybstar(0.55,5.00),
  
  mybstar(0.65,1.25),
  mybstar(0.65,1.50),
  mybstar(0.65,1.75),
  mybstar(0.65,2.00),
  mybstar(0.65,5.00),
  
  mybstar(0.75,1.25),
  mybstar(0.75,1.50),
  mybstar(0.75,1.75),
  mybstar(0.75,2.00),
  mybstar(0.75,5.00),
  
  mybstar(0.85,1.25),
  mybstar(0.85,1.50),
  mybstar(0.85,1.75),
  mybstar(0.85,2.00),
  mybstar(0.85,5.00),
  
  mybstar(1,1.25),
  mybstar(1,1.50),
  mybstar(1,1.75),
  mybstar(1,2.00),
  mybstar(1,5.00)
)

# Rmax values
A <- c(seq(from=0.55,to=0.85,by=0.1),1)
# Mydelta values
B <- c(seq(from=1.25,to=2.0,by=0.25),5)

# Create matrix to store results
C <- rbind(
  cbind(rep(A[1],length(B)),B),
  cbind(rep(A[2],length(B)),B),
  cbind(rep(A[3],length(B)),B),
  cbind(rep(A[4],length(B)),B),
  cbind(rep(A[5],length(B)),B)
)


# Create data frame of results
myresults_new <- data.frame(
  Rmax = C[,1], Delta = C[,2],
  Root1 = format(Re(myresults[,1]),digits = 3),
  Root2 = format(myresults[,2],digits = 3),
  Root3 = format(myresults[,3],digits = 3),
  BATE = format(betatilde-Re(myresults[,1]),digits = 3)
)

# Latex code of results 
# Note: copy-paste results from screen into Latex file
xtable(myresults_new)



#-------------------------------------------------- #
# ---------------- FIGURES ------------------------ #


# ------- FIGURE 1: Hindutva in Lok Sabha Elections
ls_data <- ls %>%
  mutate(
    Hindutva_SS = 100*(Hindutva_sw/seats_tot)
  ) %>%
  dplyr::select(year,Hindutva_VS,Hindutva_SS) %>%
  dplyr::rename(VoteShare=Hindutva_VS,SeatShare=Hindutva_SS) %>%
  gather("type","hnd",-year) %>%
  as.data.frame()

hndls_plot <- ggplot(
  data = ls_data, aes(x=factor(year), y=hnd, fill=type)
) +
  scale_fill_grey() +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_text(aes(label=round(hnd,digits=0)), vjust=-0.4, color="black",
            position = position_dodge(0.9), size=3)+
  geom_hline(yintercept=50, color="red") +
  labs(
    x="",
    y="percentage",
    fill=""
  ) +
  scale_y_continuous(name = "percentage",breaks = c(0,10,20,30,40,50)) +
  theme_bw() +
  theme(legend.position = "bottom")

pdf("Hindutva-LokSabha.pdf")
print(hndls_plot)
dev.off()



# ---------- FIGURE 2:Total hate crimes over time (Bar chart)
d2_tot <- d2 %>%
  group_by(year) %>%
  summarise(Minorities = sum(hcam1, na.rm = TRUE),
            Muslims = sum(muslim, na.rm = TRUE)) %>%
  gather("category", "hc", -year) %>%
  as.data.frame()

pb1 <- ggplot(data = d2_tot, aes(x=factor(year), y=hc, fill=category)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(
    x="",
    y="number of anti-minority hate crimes, total",
    fill=""
  ) +
  geom_text(aes(label=round(hc,digits=0)), vjust=-0.4, color="black",
            position = position_dodge(0.9), size=3.5)+
  scale_fill_grey() +
  theme_bw() +
  theme(legend.position = "bottom")

pdf("Hate-Crimes-AllIndia.pdf")
print(pb1)
dev.off()  


# -------------------- FIGURE 3: State Scatter Plot 
# Total hate crimes 2009-13 and 2014-18
d2_1 <- d2 %>%
  dplyr::select("state", "year", "hcam1") %>%
  filter(year<=2013) %>%
  group_by(state) %>%
  summarise(hcam1_b = sum(hcam1)) %>%
  dplyr::select("state", "hcam1_b")

d2_2 <- d2 %>%
  dplyr::select("state", "year", "hcam1") %>%
  filter(year>2013) %>%
  group_by(state) %>%
  summarise(hcam1_a = sum(hcam1)) %>%
  dplyr::select("state", "hcam1_a")

d2_3 <- left_join(d2_1, d2_2, by = c("state"))

# Hindutva vote share, Treatment dummy
d2_4 <- d2 %>%
  group_by(state) %>%
  summarise(vsbjp_1 = mean(hvs),
            trt = mean(lparty)) %>%
  mutate(vsbjp = ifelse(!is.na(vsbjp_1), vsbjp_1,0)) %>%
  dplyr::select("state", "vsbjp", "trt") 


d2_5 <- left_join(d2_3, d2_4, by = c("state")) %>%
  arrange(desc(hcam1_a)) 

# Scatter Plot
stateplot <- ggplot(
  data = d2_5, aes(x=vsbjp,y=(hcam1_a-hcam1_b))
) +
  geom_point() +
  geom_smooth(method = lm) +
  labs(
    x="BJP's 2014 Vote Share",
    y="Change in Anti-Minority Hate Crimes"
  ) +
  annotate("text",x=40,y=40,label="Uttar Pradesh") +
  theme_bw()


pdf("State-Scatter.pdf")
print(stateplot)
dev.off()




# ------------ FIGURE 4: Parallel Trends, Visual Evidence

dchrt <- d2 %>%
  dplyr::select("year", "hcam1", "treat3") %>%
  group_by(treat3, year) %>%
  summarise(hcam1_t = mean(hcam1))

p11 <- ggplot(data = dchrt, aes(x = year, y = hcam1_t, group = treat3)) +
  geom_line(aes(linetype = as.factor(treat3))) +
  geom_point() +
  geom_vline(xintercept = 2014, color = "red") +
  labs(
    x="",
    y="anti-minority hate crimes, average"
  ) +
  scale_linetype_discrete(name = "",
                          labels = c("Non-BJP States", "BJP States")) +
  theme_bw() +
  theme(legend.position = "bottom")

pdf("Hate-Crime-DDPlot.pdf")
print(p11)
dev.off()


# -------------- FIGURE 5: Placebo in terms of years

# Matrix to hold results (col1=estimate; col2 = stderr)
a <- matrix(data=NA, nrow = 6, ncol = 2)

# Model 1
res <- miceadds::lm.cluster(
  formula = hcam1 ~ after_b3:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d2, cluster = "state"
)
res_pr <- coeftest(res, vcov. = vcov(res))
a[1,] <- res_pr[c("after_b3:treat3"),c("Estimate","Std. Error")]


# Model 2
res <- miceadds::lm.cluster(
  formula = hcam1 ~ after_b2:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d2, cluster = "state"
)
res_pr <- coeftest(res, vcov. = vcov(res))
a[2,] <- res_pr[c("after_b2:treat3"),c("Estimate","Std. Error")]


# Model 3
res <- miceadds::lm.cluster(
  formula = hcam1 ~ after_b1:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d2, cluster = "state"
)
res_pr <- coeftest(res, vcov. = vcov(res))
a[3,] <- res_pr[c("after_b1:treat3"),c("Estimate","Std. Error")]


# Model 4
res <- miceadds::lm.cluster(
  formula = hcam1 ~ after:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d2, cluster = "state"
)
res_pr <- coeftest(res, vcov. = vcov(res))
a[4,] <- res_pr[c("after:treat3"),c("Estimate","Std. Error")]


# Model 5
res <- miceadds::lm.cluster(
  formula = hcam1 ~ after_a1:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d2, cluster = "state"
)
res_pr <- coeftest(res, vcov. = vcov(res))
a[5,] <- res_pr[c("after_a1:treat3"),c("Estimate","Std. Error")]


# Model 6
res <- miceadds::lm.cluster(
  formula = hcam1 ~ after_a2:treat3 + factor(year) + 
    factor(state) + lpcnsdp + lpopn + urb_sh:after + 
    m_sh:after + lit_prop:after + lcrm,
  data = d2, cluster = "state"
)
res_pr <- coeftest(res, vcov. = vcov(res))
a[6,] <- res_pr[c("after_a2:treat3"),c("Estimate","Std. Error")]




# Create data frame for plot
d1 <- as.data.frame(cbind(2011:2016,a))
colnames(d1) <- c("pd","est","std")

# Create plot
pbplot <- ggplot(data = d1, aes(x=factor(pd), y=est)) +
  geom_point(color="green4", size=2.5)+
  geom_errorbar(aes(ymin=est-1.96*std, ymax=est+1.96*std),
                width=.2,position=position_dodge(0.05)) +
  geom_hline(yintercept = 0, color="red") +
  labs(
    x="",
    y="Estimate (with 95% CI)"
  ) +
  theme_bw()

pdf("Placebo-Years-Plot.pdf")
print(pbplot)
dev.off()






